knitr::opts_chunk$set(fig.width=9, fig.height=8
                      #,results="asis"
                      ,echo = T
                      ,message = F
                      ,warning = F
                      )


# ws ---------------------------------------------------------------------------

rm(list = ls())

require(tidyverse)
require(sf)

# option setting
sf_use_s2(T)
options(tigris_use_cache = TRUE)

# dropbox dir or della base dir
ddir <- Sys.getenv('drop_dir')
# '/scratch/gpfs/km31/'

devtools::load_all()

ERGMs to predict movement across divisions in space

load data

# sample areas -----------------------------------------------------------------

library(geox)
arrs <- geox::rpops %>%
  filter(pop > 50e3
         ,rt == 'cz') %>%
  add.rns()

arrs %>%
  filter(pop > 500e3) %>%
  arrange(pop) %>%
  head(100) %>%
  pull(rn)

arrs %>%
  filter(grepl('Portland', rn))

czsf <- build.CZs('20100')
plcsf <- geox::places.wrapper(x = czsf)

library(mapview)
czsf %>% mapview() + mapview(plcsf, color = 'red')

# for analysis, i want probably cz/cbsa (or maybe sometimes Place)
# for visuals, i want region network cropped to bbox
# for sample, i'll use Place
smplc <- plcsf %>%
  filter(grepl('^Portland', name)) %>%
  st_transform(4326)
smplc %>% mapview()


# get block groups for place ---------------------------------------------------

bgs <- geox::nbhds.from.sf(x = st_bbox(smplc)
                            ,query.fcn = tigris::block_groups)
bgs[3] %>% plot()

# remove water areas (per census naming conventions)

sbgs <- bgs %>% filter(substr(tractce,1,2) != '99')

# subset to polygon, not bboxx
sbgs <- st_make_valid(sbgs)
sbgs <- geox::get.spatial.overlap(sbgs, smplc
                                  ,'geoid', 'placefp')
# filter original census query to keep all columns
sbgs <- bgs %>%
  filter(geoid %in% sbgs$geoid)

sbgs['awater'] %>% plot()

# just b/c for visuals, remove islands in ad hoc way
sbgs <- sbgs %>% filter(tractce != '002400')

bounds <- st_sf(geometry = st_union(sbgs))

# load safegraph for area ------------------------------------------------------

# function loads all CZs intersecting with area
sfg <- sfx2sfg(bounds
               ,min.flows = 10
               ,tracts.or.groups = 'bg'
               ,trim.loops = T
               ,year=2019)

#trim to just place
sfgt <- sfg %>%
  filter(across(c(origin, dest)
                , ~.x %in% sbgs$geoid))

# turn to (undirected) graph
ugh <- sfg2gh(sfgt, directed = F)

ugh

ghsf <- spatialize.graph(ugh,
                         frame.sf = NULL
                         ,tracts.or.groups = 'bg'
                         ,directed = F
                         ,year = 2019)
# transform
ghsf <- st_transform(ghsf, 4326)

Look at graph

Summarise edges (trips and tie strength); visualize

ghsf
sbgs

# flow summaries -------------------------------------------------------------

edges <- ghsf %>%
  activate(edges) %>%
  as_tibble()

tibble(edges)[Hmisc::Cs(n, tstr, dst)] %>% map(summary)

tibble(edges)[Hmisc::Cs(n, tstr, dst)] %>% map( ~quantile(.x, seq(0,1,.1)))

# trim by flow str -------------------------------------------------------------

bounds
sbgs

# skipped; sample area seems small enough
trimmed.ghsf <- apply.flow.filters(ghsf
                                   ,frame.sf = NULL
                                   ,directed = F
                                   ,min.tie.str = NULL
                                   ,tie.str.deciles = 5 # filter out bottom x deciles
                                   ,min.flows = 10
                                   ,max.dst = units::set_units(20, 'miles'))

# map graph ------------------------------------------------------------

sbgs <- sbgs %>% st_transform(4326)

# get background for mapping
sttm <- visaux::get.stamen.bkg(
  sbgs
  , zoom = 12
  ,maptype = 'toner-background'
)

# lonlat matrix for notes
lonlats <- ghsf %>%
  activate('nodes') %>%
  as_tibble() %>%
  st_sf() %>%
  st_coordinates()

library(ggraph)

ggmap(sttm
      , base_layer =
        ggraph(trimmed.ghsf # ghsf #
               , layout = lonlats )
) +
  geom_edge_density(fill = '#00D0D0') +
  geom_edge_fan( aes( edge_alpha = tstr
                      ,edge_width = tstr)
                 ,color = '#008080'
  ) +
  flow.map.base() +
  visaux::bbox2ggcrop(bounds)

Generate divisions

Can use pre-loaded divisions. I kind of like just using the function to re-generate because it makes the process more self-contained and easier to adjust.

I allocate neighborhoods (defined as block groups) to sides-of-divisons based on interstates and all urban arterials for this example.

# merge in n'hood level ERGM controls -------------------------------

# (re)generate nhood divisions ----------------------------------------------

#' i was previously working with NHPN data (national highway planning network)
#' but it was often frustratingly messy. Switching to spatial census data, which is
#' much better, but more complex


#' Census codes for road features are referenced here:
#'
#' https://www2.census.gov/geo/pdfs/reference/mtfccs2019.pdf
#' (See MTFCCS: S1100, S1200, S1400)

sbgs
rds <- tigris::roads(state = unique(sbgs$statefp)
                     ,county = unique(sbgs$countyfp)) %>%
  rename_with(tolower)

rds <- rds %>% st_transform(4326)
rds <- st_crop(rds, smplc)

# just interstates
interstates <- rds %>% 
  filter(rttyp %in% 'I')

# all primary & secondary roads (limited-access and arterials) and hwy ramps
arterials <- rds %>%
  filter(mtfcc %in%
           Hmisc::Cs(S1100, S1200, S1630))

# call divM function
bounds$placefp <- smplc$placefp

xnbd <- divM::gen.cross.tract.dividedness(
  bounds
  ,interstates
  ,nbd.query.fcn = tigris::block_groups
  ,region.id.colm = 'placefp'
  ,fill.nhpn.gaps = F # this can help in some cases and not others... nhpn data is not perfect
  ,erase.water = F
  ,year = 2019
) %>%
  select(geoid, int.poly = poly.id)


xnbd.a <- divM::gen.cross.tract.dividedness(
  bounds
  ,arterials
  ,nbd.query.fcn = tigris::block_groups
  ,region.id.colm = 'placefp'
  ,fill.nhpn.gaps = F # this can help in some cases and not others... nhpn data is not perfect
  ,erase.water = F
  ,year = 2019
) %>%
  select(geoid, arterial.poly = poly.id)

# merge in w/ nodes
ghsf <-
  ghsf %>%
  activate('nodes') %>%
  left_join(xnbd
            , by = c('name' = 'geoid')) %>% 
    left_join(xnbd.a
            , by = c('name' = 'geoid'))


# vis
ghsf %>%
  activate('nodes') %>%
  as_tibble() %>% st_sf() %>% mapview(zcol = 'int.poly') +
  mapview(interstates) +
  mapview(bounds)

ghsf %>%
  activate('nodes') %>%
  as_tibble() %>% st_sf() %>% mapview(zcol = 'arterial.poly') +
  mapview(arterials) +
  mapview(bounds)

Add other characteristics

Keep simple for this example

# get other tract-level characteristics ----------------------------------------

demos <- sfg.seg::demos.2019 %>%
  select(1,2,matches('wh$|bl$|hsp$'))

ghsf <-
  ghsf %>%
  activate('nodes') %>%
  left_join(demos
            , by = c('name' = 'geoid'))

#region-level demos
demos %>%
  select(matches('pop|^n')) %>%
  colSums() %>% format(big.mark = ',')

# play with democgraphic flow visualization ------------------------------------

# could be better if not undirected
dghsf <- ghsf %>%
  activate('edges') %>%
  mutate(flow.n.bl =
           n * (.N()$n.bl[from] + .N()$n.bl[to]) / 2
         ,flow.n.hsp =
           n * (.N()$n.hsp[from] + .N()$n.hsp[to]) / 2
         ,flow.n.wh =
           n * (.N()$n.wh[from] + .N()$n.wh[to]) / 2
  )
# .....

Run an ERGM

set.seed(1234)

# final setup for ergm ---------------------------------------------------------

# ergm can't handle a lot of things: no factors, no geometry, no bundled units, no
# NAs, etc....

# de-spatialize
ugh <- ghsf %>%
  as_tbl_graph() %>%
  activate('edges') %>%
  select(-geometry) %>%
  activate('nodes') %>%
  select(-geometry)

# remove units (should be meters)
ugh <- ugh %>%
  activate('edges') %>%
  mutate(dst = as.numeric(dst))

# convert using statnet packages
# hard to manipulate, but built for the ergm implementation
devtools::load_all()
library(statnet)
eugh <- ergm.clean.n.convert(ugh)

# run ergm ---------------------------------------------------------------------

#install.packages("ergm.count")
#install.packages("ergm.rank")
#install.packages("latentnet")
#update.packages()
library(ergm)
library(ergm.rank)
library(latentnet)

eugh
fo.base <- formula(
  eugh ~ edges +
    nodematch("int.poly", diff = FALSE) +
    #nodematch("hwy.poly", diff = FALSE) +
    absdiff("perc.wh", pow=1)
)
ugh

# ergms take while to run. A good time to use Della
ergm1 <- ergm(formula = fo.base
              ,"n"
              ,reference = ~Poisson)

ergm1 %>% summary()

Incorporating Distance and Spatial Interaction Function

# generate spatial interaction fcn ---------------------------------------------

# my little SIF tutorial:
# https://rpubs.com/kmc39/sif

sfn <- ghsf %>%
  activate('nodes') %>%
  as_tibble()

sfe <- ghsf %>%
  activate('edges') %>%
  as_tibble()

# turn distance into numerics representing km
sfe <- sfe %>% mutate(dst = as.numeric(dst) / 1000 )

dst.mat <- build.pairwise.dst.matrix(sfn)
dst.mat %>% head() # values are kilometers

# binned relative frequencies (# of ties over distance, relative to crosswise
# distance distribution)
dst.freq <-
  relative.tie_dst.frequency(
    sfn, sfe
    , dst.mat = dst.mat
    , n.bins = 60
  )

dst.freq

dst.freq %>%
  ggplot(aes(x = dst.lvl,
             y = avg.flow))  +
  geom_path()

# log-log'd
dst.freq %>%
  ggplot(aes(x = log(dst.lvl),
             y = log(avg.flow))) +
  geom_path()

# SIF typically shows decreasing flows with distance, until a certain point where the
# relationship is lost. About 2km here.

# use flow bins to parameterize a power law SIF
# below relies on the `dst.freq` in the general environment
param.fitting <-
param.fitting <-
  stats::optim(par= c(1,1.7,1, 10)
               ,fn = function(x) power.law.loss.fcn(x, dst.freq)
               , hessian = T)

# predicted tie probabilities, based on SIF
dst.freq$flow.hat <-
  power.law(dst.freq$dst.lvl,
            param.fitting$par[1],
            param.fitting$par[2],
            param.fitting$par[3]
  )

dst.freq %>%
  select(dst.lvl, avg.flow, flow.hat) %>%
  pivot_longer(cols = contains("flow"),
               values_to = "avg.flow") %>%
  ggplot() +
  geom_path(aes(color = name,
                y = avg.flow,
                x = dst.lvl)) +
  ggtitle("Relative avg flows",
          "actual vs. fitted power law")

decay.mat <- dst.mat %>%
  power.law(param.fitting$par[1],
            param.fitting$par[2],
            param.fitting$par[3])

ERGMS with SIF

# ERGM with sif, only interstates ----------------------------------------------------------------

fo.sif <-
  formula(
    eugh ~ edges +
      nodematch("int.poly", diff = FALSE) +
      #nodematch("hwy.poly", diff = FALSE) +
      absdiff("perc.wh", pow=1) +
      edgecov(decay.mat, "dst")
  )
eugh

# ergms take while to run. A good time to use Della
ergm2 <- ergm(formula = fo.sif
              ,"tstr"
              ,reference = ~Poisson)

ergm2 %>% summary()


# ERGM with sif with arterials ----------------------------------------------------------------

fo.sif <-
  formula(
    eugh ~ edges +
      nodematch("arterial.poly", diff = FALSE) +
      #nodematch("hwy.poly", diff = FALSE) +
      absdiff("perc.wh", pow=1) +
      edgecov(decay.mat, "dst")
  )
eugh

# ergms take while to run. A good time to use Della
ergm2 <- ergm(formula = fo.sif
              ,"tstr"
              ,reference = ~Poisson)

ergm2 %>% summary()


kmcd39/divflow documentation built on Dec. 21, 2021, 7:38 a.m.